Contents

import pandas as pd
from lets_plot import *
LetsPlot.setup_html()
import os
os.getcwd()
'/home/core/TSKITetude/docs/notebooks'

Define which chromosome to be plotted and the breed of interest. Also determine a second breed for comparisons. chr argument should be numeric

# import sys
# args = sys.argv
# chr=args[1]
# pop1=args[2]
# pop2=args[3]

# these are exmaple values. Must be changed!!!!
# 'LAT', 'CHR'
chr = 1
pop1 = 'LAT'
pop2 = 'CHR'
tajima = pd.DataFrame()

for chromosome in range(1, 2):
    tmp = pd.read_csv("WindowedTajima_" + str(chromosome) + ".csv")
    tmp.loc[:, "Chr"] = chromosome
    tajima = pd.concat([tajima, tmp])
plot = ggplot(data = tajima) + geom_line(aes(x = "Breakpoint", y = "TajimaD")) + \
    facet_wrap(facets = ["Chr", "Breed"], scales = "free") + ggtitle("TajimaD, remapped")
plot
ggsave(plot, "WindowedTajimaD_" + pop1 + ".png")
To export Lets-Plot figure to a PNG or PDF file please install CairoSVG libraryto your Python environment.
CairoSVG is free and distributed under the LGPL-3.0 license.
For more details visit: https://cairosvg.org/documentation/
# tajima not windowed

tajima = pd.read_csv("NotWindowedTajima_" + str(chr) + ".csv")
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[7], line 3
      1 # tajima not windowed
----> 3 tajima = pd.read_csv("NotWindowedTajima_" + str(chr) + ".csv")

File ~/.cache/pypoetry/virtualenvs/tskitetude-hh-GIRXc-py3.9/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1024, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
   1011 kwds_defaults = _refine_defaults_read(
   1012     dialect,
   1013     delimiter,
   (...)
   1020     dtype_backend=dtype_backend,
   1021 )
   1022 kwds.update(kwds_defaults)
-> 1024 return _read(filepath_or_buffer, kwds)

File ~/.cache/pypoetry/virtualenvs/tskitetude-hh-GIRXc-py3.9/lib/python3.9/site-packages/pandas/io/parsers/readers.py:618, in _read(filepath_or_buffer, kwds)
    615 _validate_names(kwds.get("names", None))
    617 # Create the parser.
--> 618 parser = TextFileReader(filepath_or_buffer, **kwds)
    620 if chunksize or iterator:
    621     return parser

File ~/.cache/pypoetry/virtualenvs/tskitetude-hh-GIRXc-py3.9/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1618, in TextFileReader.__init__(self, f, engine, **kwds)
   1615     self.options["has_index_names"] = kwds["has_index_names"]
   1617 self.handles: IOHandles | None = None
-> 1618 self._engine = self._make_engine(f, self.engine)

File ~/.cache/pypoetry/virtualenvs/tskitetude-hh-GIRXc-py3.9/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1878, in TextFileReader._make_engine(self, f, engine)
   1876     if "b" not in mode:
   1877         mode += "b"
-> 1878 self.handles = get_handle(
   1879     f,
   1880     mode,
   1881     encoding=self.options.get("encoding", None),
   1882     compression=self.options.get("compression", None),
   1883     memory_map=self.options.get("memory_map", False),
   1884     is_text=is_text,
   1885     errors=self.options.get("encoding_errors", "strict"),
   1886     storage_options=self.options.get("storage_options", None),
   1887 )
   1888 assert self.handles is not None
   1889 f = self.handles.handle

File ~/.cache/pypoetry/virtualenvs/tskitetude-hh-GIRXc-py3.9/lib/python3.9/site-packages/pandas/io/common.py:873, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
    868 elif isinstance(handle, str):
    869     # Check whether the filename is to be opened in binary mode.
    870     # Binary mode does not support 'encoding' and 'newline'.
    871     if ioargs.encoding and "b" not in ioargs.mode:
    872         # Encoding
--> 873         handle = open(
    874             handle,
    875             ioargs.mode,
    876             encoding=ioargs.encoding,
    877             errors=errors,
    878             newline="",
    879         )
    880     else:
    881         # Binary mode
    882         handle = open(handle, ioargs.mode)

FileNotFoundError: [Errno 2] No such file or directory: 'NotWindowedTajima_1.csv'
ggplot(data = tajima) + \
    geom_bar(aes(x = "Breed", y = "TajimaD"), stat = "identity") + \
        ggtitles(f"Chr{chr}'s TajimaD, not windowed")
ggsave(f"NotWindowedTajimaD_chr{chr}.png")
diversity = pd.DataFrame()
for chromosome in range(1, 28):
    tmp = pd.read_csv("WindowedDiversity_" + str(chromosome) + ".csv")
    tmp.loc[:, "Chr"] = chromosome
    diversity = pd.concat([diversity, tmp])
ggplot() + geom_line(aes(x = "Breakpoint", y = "Diversity"), data = diversity[diversity.Pop == pop1]) + \
    facet_wrap(facets = "Chr", scales = "free") + \
        geom_vline(aes(xintercept = "Breakpoint"), alpha = 0.4, colour = "red", data = breakpoint) + ggtitle("Diversity, remapped")
ggsave("WindowedDiversity_" + pop1 + ".png")
divergence = pd.DataFrame()
for chromosome in range(1, 27):
    tmp = pd.read_csv("WindowedDivergence_" + str(chromosome) + ".csv")
    tmp.loc[:, "Chr"] = chromosome
    divergence = pd.concat([divergence, tmp])
ggplot() + geom_line(aes(x = "Breakpoint", y = "Divergence"), data = divergence[(divergence.Pop1 == pop1) & (divergence.Pop2 == pop2)]) + \
    facet_wrap(facets = "Chr", scales = "free") + \
        geom_vline(aes(xintercept = "Breakpoint"), alpha = 0.4, colour = "red", data = breakpoint) + \
        ggtitle("Divergence, remapped, C Lineage")
ggsave("WindowedDivergence_" + pop1 + "_" + pop2 + ".png")
# TBD
# fst = pd.DataFrame()

# for chromosome in range(1, 28):
#     tmp = pd.read_csv("WindowedFst_" + str(chromosome) + ".csv")
#     tmp.loc[:, "Chr"] = chromosome
#     fst = pd.concat([fst, tmp])
# ggplot() + geom_line(aes(x = "Breakpoint", y = "Fst"), data = fst[(fst.Pop1 == pop1) & (fst.Pop2 == pop2)]) + \
#     facet_wrap(facets = "Chr", scales = "free") + \
#         geom_vline(aes(xintercept = "Breakpoint"), alpha = 1, colour = "red", data = breakpoint)